#clean up
rm(list=ls())

#set seed
set.seed(715130425)

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(spdep)
#install.packages('/Volumes/rgdal/rgdal_0.9-1.tgz',repos=NULL)
library(rgdal)
#install.packages('/Users/jamie/Downloads/prevR_2.9.tgz',repos=NULL)
#library(prevR)
library(car)
library(xtable)
library(maptools)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/monogan/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

#load survey data 
combined<-read.dta('cces08reformattedFine.dta')
combined.48<-read.dta('cces08reformattedFine48.dta')
#plot(x=combined$eastings,y=combined$northings,pch=".")


###EMPIRICAL SEMIVARIOGRAM###
ideol.vario<-variog(coords=cbind(combined.48$eastings,combined.48$northings), data=combined.48$ideology)
ideol.robust<-variog(coords=cbind(combined.48$eastings,combined.48$northings), data=combined.48$ideology, estimator.type="modulus")

#pdf('rawVariogram.pdf')
plot(ideol.vario, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,800))
par(new=T)
plot(ideol.robust, xlab="", ylab="",axes=F,ylim=c(600,800),pch='+', col='blue')
#dev.off()

###QUADRATIC MODEL###
covariates<-combined.48
covariates$cathOrth<-covariates$cath+covariates$ortho
covariates$consRelig<-covariates$mormon+covariates$evan
covariates$musJew<-covariates$islam+covariates$jewish
ideol.ols<-lm(ideology~age*educ+as.factor(race)*female+inc14+cathOrth+consRelig+musJew+mainline+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),data=covariates);summary(ideol.ols)
se<-sqrt(diag(summary(ideol.ols)$cov.unscaled))*summary(ideol.ols)$sigma
xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=4)
#xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=10)
combined.48$ols.resid<-ideol.ols$residuals

###RESIDUAL SEMIVARIOGRAM###
resid.robust<-variog(coords=cbind(combined.48$eastings,combined.48$northings), data=combined.48$ols.resid, estimator.type="modulus")

###A FEW VARIOGRAM MODELS USING WEIGHTED LEAST SQAURES###
wave.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="wave", fix.nugget=FALSE, nugget=700)
wave.fit
21764 ^(3/21764)* 6474488595/21764 #297896

exponential.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="exponential", fix.nugget=FALSE, nugget=700)
exponential.fit
21764 ^(3/21764)* 6657344128/21764 #306309.3

matern.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="matern", fix.nugget=FALSE, nugget=700, kappa=1)
matern.fit
21764 ^(3/21764)* 6482572896/21764 #298268

gaussian.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="gaussian", fix.nugget=FALSE, nugget=700)
gaussian.fit
21764 ^(3/21764)* 6474161011/21764 #297880.9
#     tausq    sigmasq        phi 
#  625.9614   20.6852 9966.7919 

#graphical output of residual variogram model
#pdf('olsGaussianFINE1.pdf',width=5,height=5)
plot(ideol.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,725))
par(new=T)
plot(resid.robust, xlab="", ylab="",axes=F,ylim=c(600,725),pch='+', col='blue')
lines(gaussian.fit, col='blue')
#dev.off()

#pdf('olsGaussianFINE2.pdf',width=5,height=5)
plot(resid.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(615,645),pch='+', col='blue')
lines(gaussian.fit, col='blue')
#dev.off()


###OUT-OF-SAMPLE KRIGING TO PLACE ALASKA####
#prep data
ak.data<-subset(combined,subset=state=="AK")
ak.data$eastings<-ak.data$eastings+2287-2337
ak.data$northings<-ak.data$northings-3456-348.6
adj<-seq(0,1902,by=50)#how much to move northings
ak.sse<-rep(NA,length(adj))

#loop to try different placements
for (i in 1:length(adj)){
	ak.data$northings<-ak.data$northings+adj[i]
	ak.linear<-predict(ideol.ols,ak.data)#X\beta prediction of HI observations
	u.loci <- cbind(ak.data$eastings,ak.data$northings)
	ak.krige<-matrix(NA,nrow=dim(ak.data)[1],ncol=50)

	for(j in 1:50){
		obj<-cbind(combined.48$eastings,combined.48$northings,combined.48$ols.resid)[sample(1:21764,5000),]
		ideol.geo<-as.geodata(obj,coords.col=1:2,data.col=3)
		test.forecast<-krige.control(cov.pars=c(93.7866,23089.3975))
		ak.krige[,j]<-krige.conv(geodata=ideol.geo,locations=u.loci,borders=NULL,krige=test.forecast)$predict
		}
	
	avg.krige<-apply(ak.krige,1,mean)
	ak.sse[i]<-sum((ak.data$ideology-ak.linear-avg.krige)^2)
	}

#results
ak.sse
min(ak.sse);which.min(ak.sse);adj[which.min(ak.sse)]

###OUT-OF-SAMPLE KRIGING TO PLACE HAWAII####
#prep data
hi.data<-subset(combined,subset=state=="HI")
hi.data$eastings<-hi.data$eastings+6009-2337+75
hi.data$northings<-hi.data$northings-217-348.6
adj<-seq(0,1902,by=50)#how much to move northings
hi.sse<-rep(NA,length(adj))

#loop to try different placements
for (i in 1:length(adj)){
	hi.data$northings<-hi.data$northings+adj[i]
	hi.linear<-predict(ideol.ols,hi.data)#X\beta prediction of HI observations
	u.loci <- cbind(hi.data$eastings,hi.data$northings)
	hi.krige<-matrix(NA,nrow=dim(hi.data)[1],ncol=50)

	for(j in 1:50){
		obj<-cbind(combined.48$eastings,combined.48$northings,combined.48$ols.resid)[sample(1:21764,5000),]
		ideol.geo<-as.geodata(obj,coords.col=1:2,data.col=3)
		test.forecast<-krige.control(cov.pars=c(93.7866,23089.3975))
		hi.krige[,j]<-krige.conv(geodata=ideol.geo,locations=u.loci,borders=NULL,krige=test.forecast)$predict
		}
		
	avg.krige<-apply(hi.krige,1,mean)
	hi.sse[i]<-sum((hi.data$ideology-hi.linear-avg.krige)^2)
	}

#results
hi.sse
min(hi.sse);which.min(hi.sse);adj[which.min(hi.sse)]

###DRAW THE NEW MAP###
#refresh AK & HI data to get the eastings and northings right
ak.data<-subset(combined,subset=state=="AK")
ak.data$eastings<-ak.data$eastings+2287-2337
ak.data$northings<-ak.data$northings-3456-348.6+500

hi.data<-subset(combined,subset=state=="HI")
hi.data$eastings<-hi.data$eastings+6009-2337+75
hi.data$northings<-hi.data$northings-217-348.6+450

#plot it
#pdf('planA.pdf')
#pdf('planB.pdf')
plot(x=combined.48$eastings,y=combined.48$northings,pch=".",xlim=c(-3500,2100))
points(x=hi.data$eastings,y=hi.data$northings,pch=".",col='blue')
points(x=ak.data$eastings,y=ak.data$northings,pch=".",col='red')
#dev.off()

###plot SSE for each###
#write.csv(t(hi.sse))
#write.csv(t(ak.sse))
#hi.sse<-c(24273.5320815838,23592.4512136371,25603.4419663783,30333.8602931684,35542.1712158448,32233.1435509492,40790.2993502016,30222.2281671638,30645.2747183474,22284.5336888275,23302.122293276,24540.02902515,25066.9285049128,35868.102944328,48337.6357733322,70886.852218796,121886.279548529,177901.249523989,275048.957072699,409124.255489566,604132.615014424,887511.831855265,1309090.29417066,1907552.28206585,2491404.70966279,3501416.79046749,4789177.41859667,6253112.83025763,8264060.82549447,10908149.3129511,14348743.8190713,18411469.9646345,23260983.5021595,29489577.597364,37228628.898835,46428602.331189,57777556.6629137,71553373.0292408,87880653.8188897)
#ak.sse<-c(38446.7778481021,34952.3235994549,39844.6988497537,47539.7760454664,47896.3586247553,54783.481321664,48208.9241029181,48025.6531210989,43385.1388359243,37035.9375454677,31780.6407966582,36458.6788502418,38136.8111850592,49962.7871661608,64921.041442751,103300.984228293,141178.321791597,223004.190550988,258718.507619063,453590.981749617,707563.943026453,922332.522338255,1403837.53824891,1982090.58567184,2754914.79873191,3758159.43246304,5110151.92570641,6891208.24019821,9049921.79101113,11960049.4561741,15427804.8710901,19757575.4656664,25447286.3698546,32244596.6704835,40846353.8782011,51392758.3666752,63492495.4805032,78507516.7238591,96538951.8259819)

#> min(ak.sse);which.min(ak.sse);adj[which.min(ak.sse)]
#[1] 31780.64
#[1] 11
#[1] 500
#> min(hi.sse);which.min(hi.sse);adj[which.min(hi.sse)]
#[1] 22284.53
#[1] 10
#[1] 450

#pdf("wideSSE.pdf")
#postscript("wideSSE.eps")
plot(y=hi.sse,x=adj,type='l',xlab="Distance from Southernmost Pacific Coast (km)",ylab="Sum of Squared Errors",lwd=2)
lines(y=ak.sse,x=adj,lty=2,col='blue',lwd=2)
legend(x=0,y=20000000,lty=c(1,2),col=c("black","blue"),lwd=2,legend=c("HI","AK"))
#dev.off()

#pdf("narrowSSE.pdf")
#postscript("narrowSSE.eps")
plot(y=hi.sse,x=adj,type='l',xlim=c(adj[1],adj[15]),ylim=c(22880,60000),xlab="Distance from Southernmost Pacific Coast (km)",ylab="Sum of Squared Errors",lwd=2)
lines(y=ak.sse,x=adj,lty=2,col='blue',lwd=2)
legend(x=0,y=60000,lty=c(1,2),col=c("black","blue"),lwd=2,legend=c("HI","AK"))
points(x=adj[1],y=ak.sse[1],pch=2,col='blue')
points(x=adj[11],y=ak.sse[11],pch=2,col='blue')
text(x=adj[1]+20,y=ak.sse[1],labels=print(round(ak.sse[1])),col='blue',pos=1)
text(x=adj[11],y=ak.sse[11],labels=print(round(ak.sse[11])),col='blue',pos=1)
points(x=adj[1],y=hi.sse[1])
points(x=adj[10],y=hi.sse[10])
text(x=adj[1]+20,y=hi.sse[1],labels=print(round(hi.sse[1])),pos=1)
text(x=adj[10]+10,y=hi.sse[10]+300,labels=print(round(hi.sse[10])),pos=3)
#dev.off()
